# -*- coding: utf-8 -*-
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from njsc.rules import *


class CatalogSpider(BaseSpider):
    name = 'catalog'

    # 常用链接
    main_url = 'http://www.njsc365.com/api/index.php/index.html?output=json'
    # 三级分类链接
    catalog3_url = 'http://www.njsc365.com/api/index.php/list-{}.html?output=json'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CatalogSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        yield Request(
            url=self.main_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no
            },
            callback=self.parse_catalog,
            errback=self.error_back,
            priority=100
        )

    def parse_catalog(self, response):
        cats_2 = parse_catalog(response)
        if cats_2 and len(cats_2):
            self.logger.info('分类2: count[%s]' % len(cats_2))
            yield Box('catalog', self.batch_no, cats_2)
            for cat in cats_2:
                # 补充三级分类
                if cat.get('level') == 2:
                    cat2_id = cat.get("catalogId")
                    yield Request(
                        url=self.catalog3_url.format(cat2_id),
                        meta={
                            'reqType': 'catalog',
                            'batchNo': self.batch_no,
                            'parentId': cat2_id
                        },
                        callback=self.parse_catalog3,
                        errback=self.error_back,
                        priority=100
                    )
        else:
            self.logger.error('无分类: [%s] url -> [%s]' % (self.batch_no, self.main_url))

    def parse_catalog3(self, response):
        cats_3 = parse_catalog3(response)
        self.logger.info('分类3: count[%s]' % len(cats_3))
        yield Box('catalog', self.batch_no, cats_3)


